#library

library(readr)
library(tidyselect)
library(tidyverse)
## Warning in as.POSIXlt.POSIXct(Sys.time()): unable to identify current timezone 'C':
## please set environment variable 'TZ'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ dplyr   1.0.10
## ✔ tibble  3.1.8      ✔ stringr 1.4.1 
## ✔ tidyr   1.2.1      ✔ forcats 0.5.2 
## ✔ purrr   0.3.4      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(broom)
library(vroom)
## Warning: package 'vroom' was built under R version 4.2.2
library(gganimate)
## Warning: package 'gganimate' was built under R version 4.2.2
library(maps)
## Warning: package 'maps' was built under R version 4.2.2
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.2.2
library(tinytex)
## Warning: package 'tinytex' was built under R version 4.2.2

#Link to Download the dataset https://platform.who.int/mortality/countries/country-details/MDB/luxembourg #Link to Download the dataset world https://platform.who.int/mortality/themes/theme-details/MDB/noncommunicable-diseases #https://ourworldindata.org/age-structure #Read the dataset and Tidy.

#Mortality in Luxembourg
Mortality<-read_csv("data/Mortality_Database_Luxembourg.csv",
                    skip = 9,
    
        
      col_names  = c( "Indicator_Code",
                     "Indicator_Name",
                     "Year",
                     "Sex",
                     "Age_group_code",
                     "Age_Group",
                     "Number",
                     "Percentage_of_cause_specific_deaths_out_of_total_deaths",
                     "Age_standardized_death_rate_per_100000_standard_population",
                     "Death_rate_per_100000_population",
                     "Dumm"),
         show_col_types = TRUE)
## Rows: 397429 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Indicator_Code, Indicator_Name, Sex, Age_group_code, Age_Group
## dbl (5): Year, Number, Percentage_of_cause_specific_deaths_out_of_total_deat...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mortality
## # A tibble: 397,429 × 10
##    Indicato…¹ Indic…²  Year Sex   Age_g…³ Age_G…⁴ Number Perce…⁵ Age_s…⁶ Death…⁷
##    <chr>      <chr>   <dbl> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>   <dbl>
##  1 CG0000     All Ca…  1967 All   Age_all [All]     4154     100    984.  1239. 
##  2 CG0000     All Ca…  1967 All   Age00   [0]        124     100     NA   2583. 
##  3 CG0000     All Ca…  1967 All   Age01_… [1-4]       20     100     NA     97.6
##  4 CG0000     All Ca…  1967 All   Age05_… [5-9]       18     100     NA     70.3
##  5 CG0000     All Ca…  1967 All   Age10_… [10-14]     11     100     NA     45.1
##  6 CG0000     All Ca…  1967 All   Age15_… [15-19]     18     100     NA     79.3
##  7 CG0000     All Ca…  1967 All   Age20_… [20-24]     22     100     NA    101. 
##  8 CG0000     All Ca…  1967 All   Age25_… [25-29]     25     100     NA    110. 
##  9 CG0000     All Ca…  1967 All   Age30_… [30-34]     37     100     NA    161. 
## 10 CG0000     All Ca…  1967 All   Age35_… [35-39]     65     100     NA    255. 
## # … with 397,419 more rows, and abbreviated variable names ¹​Indicator_Code,
## #   ²​Indicator_Name, ³​Age_group_code, ⁴​Age_Group,
## #   ⁵​Percentage_of_cause_specific_deaths_out_of_total_deaths,
## #   ⁶​Age_standardized_death_rate_per_100000_standard_population,
## #   ⁷​Death_rate_per_100000_population
#Mortality causes by noncauminicate desease in the in luxembourg from 1967
Noncauminicate_in_luxembourg<-read_csv("data/Luxembourg/Luxembourg.csv",
                         skip = 7,
                         col_names  = c( "Region_Code",
                     "Region_Name",
                     "Country_Code",
                     "region",
                     "Year",
                     "Sex",
                     "Age_group_code",
                     "Age_Group",
                     "Number",
                     "Percentage_of_cause_specific_deaths_out_of_total_deaths",
                     "Age_standardized_death_rate_per_100_000_standard_population",
                     "Death_rate_per_100_000_population",
                     ""),
         show_col_types = TRUE)
## New names:
## Rows: 53 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Region_Code, Region_Name, Country_Code, region, Sex, Age_group_code... dbl
## (5): Year, Number, Percentage_of_cause_specific_deaths_out_of_total_deat...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...13`
Noncauminicate_in_luxembourg
## # A tibble: 53 × 12
##    Region_Code Regio…¹ Count…² region  Year Sex   Age_g…³ Age_G…⁴ Number Perce…⁵
##    <chr>       <chr>   <chr>   <chr>  <dbl> <chr> <chr>   <chr>    <dbl>   <dbl>
##  1 EU          Europe  LUX     Luxem…  1967 All   Age_all [All]     3407    82.0
##  2 EU          Europe  LUX     Luxem…  1968 All   Age_all [All]     3475    84.5
##  3 EU          Europe  LUX     Luxem…  1969 All   Age_all [All]     3561    85.2
##  4 EU          Europe  LUX     Luxem…  1970 All   Age_all [All]     3507    84.4
##  5 EU          Europe  LUX     Luxem…  1971 All   Age_all [All]     3730    84.7
##  6 EU          Europe  LUX     Luxem…  1972 All   Age_all [All]     3512    85.2
##  7 EU          Europe  LUX     Luxem…  1973 All   Age_all [All]     3542    84.8
##  8 EU          Europe  LUX     Luxem…  1974 All   Age_all [All]     3607    84.1
##  9 EU          Europe  LUX     Luxem…  1975 All   Age_all [All]     3737    85.4
## 10 EU          Europe  LUX     Luxem…  1976 All   Age_all [All]     3775    84.0
## # … with 43 more rows, 2 more variables:
## #   Age_standardized_death_rate_per_100_000_standard_population <dbl>,
## #   Death_rate_per_100_000_population <dbl>, and abbreviated variable names
## #   ¹​Region_Name, ²​Country_Code, ³​Age_group_code, ⁴​Age_Group,
## #   ⁵​Percentage_of_cause_specific_deaths_out_of_total_deaths
#Mortality causes by noncauminicate desease in the world
Noncauminicate<-read_csv("data/WHOMortalityDatabase_Map_Noncommunicable_diseases_14th_December_2022_17_17.csv",
                         skip = 7,
                         col_names  = c( "Region_Code",
                     "Region_Name",
                     "Country_Code",
                     "region",
                     "Year",
                     "Sex",
                     "Age_group_code",
                     "Age_Group",
                     "Number",
                     "Percentage_of_cause_specific_deaths_out_of_total_deaths",
                     "Age_standardized_death_rate_per_100_000_standard_population",
                     "Death_rate_per_100_000_population",
                     ""),
         show_col_types = TRUE)
## New names:
## Rows: 291 Columns: 13
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): Region_Code, Region_Name, Country_Code, region, Sex, Age_group_code... dbl
## (4): Year, Number, Percentage_of_cause_specific_deaths_out_of_total_deat... lgl
## (2): Age_standardized_death_rate_per_100_000_standard_population, ...13
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...13`
Noncauminicate
## # A tibble: 291 × 13
##    Region_Code Regio…¹ Count…² region  Year Sex   Age_g…³ Age_G…⁴ Number Perce…⁵
##    <chr>       <chr>   <chr>   <chr>  <dbl> <chr> <chr>   <chr>    <dbl>   <dbl>
##  1 EU          Europe  ALB     Alban…  2000 All   Age75_… [75-79]   1835    87.5
##  2 EU          Europe  ALB     Alban…  2000 All   Age80_… [80-84]   1592    79.0
##  3 EU          Europe  ALB     Alban…  2000 All   Age85_… [85+]     1834    67.4
##  4 NAC         North … ATG     Antig…  2000 All   Age75_… [75-79]     51    89.5
##  5 NAC         North … ATG     Antig…  2000 All   Age80_… [80-84]     39    86.7
##  6 NAC         North … ATG     Antig…  2000 All   Age85_… [85+]       64    84.2
##  7 CSA         Centra… ARG     Argen…  2000 All   Age75_… [75-79]  30924    82.4
##  8 CSA         Centra… ARG     Argen…  2000 All   Age80_… [80-84]  28769    80.8
##  9 CSA         Centra… ARG     Argen…  2000 All   Age85_… [85+]    41270    78.1
## 10 AS          Asia    ARM     Armen…  2000 All   Age75_… [75-79]   2854    94.6
## # … with 281 more rows, 3 more variables:
## #   Age_standardized_death_rate_per_100_000_standard_population <lgl>,
## #   Death_rate_per_100_000_population <dbl>, ...13 <lgl>, and abbreviated
## #   variable names ¹​Region_Name, ²​Country_Code, ³​Age_group_code, ⁴​Age_Group,
## #   ⁵​Percentage_of_cause_specific_deaths_out_of_total_deaths
#Population by group age Europe
Population<-read_csv("data/age_dependency_ratio_of_working_age_population.csv",
                         skip = 7,
                         col_names  = c( "region",
                     "Code",
                     "Year",
                     "estimates_age",
                     ""),
         show_col_types = TRUE)
## New names:
## Rows: 18138 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): region, Code dbl (2): Year, estimates_age
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...5`
Population
## # A tibble: 18,138 × 4
##    region      Code   Year estimates_age
##    <chr>       <chr> <dbl>         <dbl>
##  1 Afghanistan AFG    1956          78.9
##  2 Afghanistan AFG    1957          79.2
##  3 Afghanistan AFG    1958          79.6
##  4 Afghanistan AFG    1959          79.8
##  5 Afghanistan AFG    1960          80.0
##  6 Afghanistan AFG    1961          80.2
##  7 Afghanistan AFG    1962          80.4
##  8 Afghanistan AFG    1963          80.7
##  9 Afghanistan AFG    1964          81.2
## 10 Afghanistan AFG    1965          82  
## # … with 18,128 more rows

#We want to see how the mortality rate varies over the course of 1960 to 2021.

Mortality %>% filter(Age_group_code == "Age_all", Indicator_Name=="All Causes" ) %>% 

ggplot(aes(x=Year, y=Number, group=Sex))+
  geom_line(aes(color=Sex))+
  geom_point(aes(color=Sex))+
  xlab("Year") + ylab("Number") +
  ggtitle("Mortality rate over years in Luxembourg")+
  transition_reveal(Year)
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

#We shall now analyze a few major causes of mortality in luxembourg and how these causes have affected different parts of the sex

Mortality |> count(Indicator_Name, sort = TRUE)
## # A tibble: 193 × 2
##    Indicator_Name                            n
##    <chr>                                 <int>
##  1 All Causes                             2703
##  2 Appendicitis                           2703
##  3 Birth asphyxia and birth trauma        2703
##  4 Breast cancer                          2703
##  5 Cardiovascular diseases                2703
##  6 Cataracts                              2703
##  7 Cerebrovascular disease                2703
##  8 Childhood-cluster diseases             2703
##  9 Chronic obstructive pulmonary disease  2703
## 10 Cirrhosis of the liver                 2703
## # … with 183 more rows
ggplot(data = Mortality |> filter(Number>1000)) + 
  geom_col(aes(y = Number, x = Year)) +
  facet_wrap(~Indicator_Name)

#Number of deaf by noncommunicatif desease by cat age

#summurase
#(?<=\\[).*(?=\\])

#Mortality

Mortality %>% filter(Indicator_Name =="Noncommunicable diseases") %>%
  mutate(Age_by_group_New = str_extract(Age_Group,"(\\d)+") %>% as.numeric(),
         Age_by_group_New = if_else(Age_group_code == "Age_all", 74, Age_by_group_New),
         Age_Group = fct_reorder(Age_Group, Age_group_code)) %>%
  
  ggplot(aes(x = Age_Group, y=Number, color=Sex))+
  geom_point(size=2, alpha=0.5)+
  #geom_line(size=1)+
  theme_minimal()+
  labs(title="Yearly Culmulative mortality by NonComunication deseses in Luxembourg")+
  theme(axis.text.x = element_text(angle = 90))#+

  #transition_reveal(Year)

#Find the Year , where we have most mortality from Age 54 to 74 in luxembourg

world_map <- map_data("world")

Luxembourg_map<-world_map %>% filter(region == "Luxembourg")

Noncauminicate_in_luxembourg<-Noncauminicate_in_luxembourg %>% select("region",
                     
                     "Year",
                     "Death_rate_per_100_000_population")

Luxembourg_map_join<-left_join(Luxembourg_map,Noncauminicate_in_luxembourg, by="region")

#Luxembourg_map_join

for (y in unique(Noncauminicate_in_luxembourg$Year)) {
  
   print(Noncauminicate_in_luxembourg %>%
    filter(Year == y) %>% 
    select(Year,region,Death_rate_per_100_000_population) %>% 
      
    
    ggplot() +
    geom_map(
      dat = Luxembourg_map, map = Luxembourg_map, aes(map_id = region),
      fill = "white", color = "#7f7f7f"
    ) +
    geom_map(map = Luxembourg_map, aes(map_id = region, fill = Death_rate_per_100_000_population)) +
    scale_fill_gradient(low = "white", high = "#cc4c02", name = "Fraction", limits = c(0, max(Noncauminicate_in_luxembourg$Death_rate_per_100_000_population))) +
    expand_limits(x = Luxembourg_map$long, y = Luxembourg_map$lat) + 
    labs(title = paste("Fraction of 50+ Population in ", y)))+
    transition_manual(frames = Year)
    
  
  
  
}

#Comparing the mortality cause by noncomminicate desase in Luxembourg to other country from 54 to 74

mapdata <- map_data("world")

newDataForNonCauminicate<-Noncauminicate %>% select(region, Number) 
       
#newDataForNonCauminicate 
mapdata_join<-inner_join(mapdata,newDataForNonCauminicate, by="region")


newMapdata<-mapdata_join %>% filter(!is.na(mapdata_join$Number))

map1<- ggplot(newMapdata, aes(x = long, y = lat,fill=Number, group=group))+
  geom_polygon(color="black")+ 
  theme(panel.background = element_rect(fill = "#101045")) +
  coord_equal()

#map1

map2<-map1 + scale_fill_gradient(name="%Number of dies", low = "Yellow", high="red", na.value = "grey50")+
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        rect = element_blank()
        )

#map2

ggdraw() +
  draw_image("C:/Users/user/Documents/Data science/R/DataVisualization/bill.jpg",  x = 0.35, y = 0.3, scale = .2) +
  draw_plot(map2)

#Life experience in 2000 comparing the rest of the word in 2000(Map) with noncommunication desases

Population
## # A tibble: 18,138 × 4
##    region      Code   Year estimates_age
##    <chr>       <chr> <dbl>         <dbl>
##  1 Afghanistan AFG    1956          78.9
##  2 Afghanistan AFG    1957          79.2
##  3 Afghanistan AFG    1958          79.6
##  4 Afghanistan AFG    1959          79.8
##  5 Afghanistan AFG    1960          80.0
##  6 Afghanistan AFG    1961          80.2
##  7 Afghanistan AFG    1962          80.4
##  8 Afghanistan AFG    1963          80.7
##  9 Afghanistan AFG    1964          81.2
## 10 Afghanistan AFG    1965          82  
## # … with 18,128 more rows

#study cas of the mortality by noncommunicate deseas(Regression linear)

ggplot(data = Mortality |> filter(Indicator_Name =="Noncommunicable diseases")) + 
  geom_point(aes(y =Number , x = Age_Group, fill=Sex)) #+

  #geom_bar()
  
  #facet_wrap(~Sex)

#Prediction for 2023